library(tidyverse)
library(here)
library(phyloseq)
library(vegan)
theme_set(theme_bw())
filt_fun <- function (x, min_reads = 100, min_samples = 5) {
(sum(x) > min_reads) & (sum(x > 0) > min_samples)
}
ps <- readRDS(here('data','following_study','ps_rarefied.rds')) %>%
filter_taxa(filt_fun, prune = TRUE)
sample_data(ps)[is.na(sample_data(ps))] <- 'NA'
sam <- data.frame(sample_data(ps))
alpha.diversity <- estimate_richness(ps)
plot_richness(ps, measures = 'Shannon',x = 'Epileptic.or.Control') +
geom_boxplot()
plot_richness(ps, measures = 'Shannon',x = 'Breed.Group..1.') +
geom_boxplot()
ord <- ps %>% ordinate(method = "MDS", distance = 'unifrac')
plot_ordination(ps, ord)
The PCoA plot shows a very separate pattern of four clusters. Let’s take a look at the phlogenetic tree of our data.
plot_tree(ps, "treeonly")
Some of the taxa have extremely long branch. Are they influential?
# Warning: Not all nodes are descendants in this tree
if (require(adephylo)) {dist.root <- adephylo::distRoot(phy_tree(ps))}
## Loading required package: adephylo
## Loading required package: ade4
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
hist(dist.root); dist.root %>% sort(decreasing = TRUE) %>% head(20)
## ASV438 ASV287 ASV484 ASV133 ASV562 ASV699 ASV437 ASV450
## 5.082379 4.267155 4.245708 4.159508 4.154919 4.126327 4.121791 4.088335
## ASV286 ASV478 ASV357 ASV446 ASV454 ASV423 ASV380 ASV213
## 4.077083 4.074518 4.060563 4.045067 4.041199 4.026320 1.631610 1.617836
## ASV644 ASV268 ASV41 ASV201
## 1.569113 1.417223 1.411706 1.406185
detach('package:adephylo', character.only = TRUE, unload = TRUE)
Let’s see which ASV has distance from root longer than 10.
long.branch <- dist.root[dist.root > 5]
long.branch
## ASV438
## 5.082379
Are these ASVs special?
dada2:::pfasta(refseq(ps)[names(long.branch)], ids = names(long.branch))
## >ASV438
## CAAATCCGAACTGCAAAATACTCATGGAAAGAATCGCTCCTGTTTTTATTCCTTCTCCTAACATGGTAAGAACAGAAACTGGCACTCCAAATACAGAAAAACCACAAAGCATACAAATAAAAAGCCATCCGCCTCTCTGTGCCAGAACCTGGAAAAAATATTCTTTTCCAGAAACATCTTCCCCCGCAAAAGCACCCAAAAGGTAAAATACATGGACTGTTTCCTGTTTCCACTGCATCTTCCACAAAAGATTTGGAAGAACCGTTCCC
Result returned from BLAST (web interface):
| ASV | Taxonomy | % identity |
|---|---|---|
| ASV601 | Anaerobutyricum hallii | 95.1 |
| ASV438 | uncultured Blautia sp. | 97.3 |
pwalign::stringDist(refseq(ps)[names(long.branch)])
## dist(0)
Now we remove these outlier ASVs
all.taxa <- taxa_names(ps)
keep.taxa <- all.taxa[!(all.taxa %in% names(long.branch))]
ps.sub <- ps %>% prune_taxa(keep.taxa, .)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
plot_ord <- function(data, factor, method, distance, strata = NULL){
data.ord <- ordinate(data, method = method, distance = distance)
p <- plot_ordination(data, data.ord, color = factor)
p <- p + stat_ellipse(type = "t",geom = "polygon",alpha = 0)
dist.matrix <- phyloseq::distance(data, method=distance)
model <- as.formula(paste0('dist.matrix ~ ', factor))
sam <- data.frame(sample_data(data))
if (is.null(strata) == FALSE){strata.variable <- sam[,strata]} else {strata.variable <- NULL}
permanova <- adonis2(model, data = sam, permutations=1e4, parallel = parallel::detectCores(), strata = strata.variable)
p <- p + ggtitle(str_c(factor,method,distance, sep = ' - '),
subtitle = str_c('p = ', permanova$`Pr(>F)`, '\n',
'R2 = ', permanova$R2))
print(p)
}
dist.matrix <- phyloseq::distance(ps.sub, method='bray')
adonis2(dist.matrix~Household,
data = data.frame(sample_data(ps.sub)),
permutations=1e4,
parallel = parallel::detectCores())
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = dist.matrix ~ Household, data = data.frame(sample_data(ps.sub)), permutations = 10000, parallel = parallel::detectCores())
## Df SumOfSqs R2 F Pr(>F)
## Household 48 11.7531 0.68929 2.2647 9.999e-05 ***
## Residual 49 5.2979 0.31071
## Total 97 17.0510 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot_ord(ps.sub, 'Breed.Group..1.','MDS','bray', strata = 'Household')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plot_ord(ps.sub, 'Breed.Group..1.','MDS','wunifrac', strata = 'Household')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plot_ord(ps.sub, 'Breed.Group..1.','NMDS','bray', strata = 'Household')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2280626
## Run 1 stress 0.2611366
## Run 2 stress 0.2575884
## Run 3 stress 0.2340801
## Run 4 stress 0.2469225
## Run 5 stress 0.228046
## ... New best solution
## ... Procrustes: rmse 0.001717483 max resid 0.01231734
## Run 6 stress 0.2290783
## Run 7 stress 0.2295038
## Run 8 stress 0.244936
## Run 9 stress 0.258959
## Run 10 stress 0.2349749
## Run 11 stress 0.2654525
## Run 12 stress 0.2251388
## ... New best solution
## ... Procrustes: rmse 0.03214574 max resid 0.2720156
## Run 13 stress 0.2545423
## Run 14 stress 0.2378738
## Run 15 stress 0.2523822
## Run 16 stress 0.2462619
## Run 17 stress 0.2423869
## Run 18 stress 0.228046
## Run 19 stress 0.2552744
## Run 20 stress 0.2485815
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 1: no. of iterations >= maxit
## 19: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
plot_ord(ps.sub, 'Breed.Group..1.','NMDS','wunifrac', strata = 'Household')
## Run 0 stress 0.1727684
## Run 1 stress 0.1821764
## Run 2 stress 0.1764189
## Run 3 stress 0.1844472
## Run 4 stress 0.1834722
## Run 5 stress 0.1954316
## Run 6 stress 0.1937002
## Run 7 stress 0.182134
## Run 8 stress 0.180119
## Run 9 stress 0.176871
## Run 10 stress 0.1721278
## ... New best solution
## ... Procrustes: rmse 0.02022695 max resid 0.1658516
## Run 11 stress 0.1819133
## Run 12 stress 0.1793992
## Run 13 stress 0.1832209
## Run 14 stress 0.1939745
## Run 15 stress 0.1782539
## Run 16 stress 0.1881875
## Run 17 stress 0.1933028
## Run 18 stress 0.1798094
## Run 19 stress 0.172178
## ... Procrustes: rmse 0.02523103 max resid 0.1698661
## Run 20 stress 0.1848238
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
The Breed Group has a significant effect.
plot_ord(ps.sub, 'Sex','MDS','bray')
plot_ord(ps.sub, 'Sex','MDS','wunifrac')
plot_ord(ps.sub, 'Sex','NMDS','bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2280626
## Run 1 stress 0.2639103
## Run 2 stress 0.2409075
## Run 3 stress 0.2280614
## ... New best solution
## ... Procrustes: rmse 0.001718876 max resid 0.01159968
## Run 4 stress 0.2353016
## Run 5 stress 0.2294474
## Run 6 stress 0.2538421
## Run 7 stress 0.2592001
## Run 8 stress 0.2247601
## ... New best solution
## ... Procrustes: rmse 0.03895911 max resid 0.2679994
## Run 9 stress 0.234827
## Run 10 stress 0.2505732
## Run 11 stress 0.2434935
## Run 12 stress 0.2598964
## Run 13 stress 0.2291744
## Run 14 stress 0.2509647
## Run 15 stress 0.2509752
## Run 16 stress 0.2397231
## Run 17 stress 0.2489367
## Run 18 stress 0.2332642
## Run 19 stress 0.233264
## Run 20 stress 0.2252835
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 3: no. of iterations >= maxit
## 17: stress ratio > sratmax
plot_ord(ps.sub, 'Sex','NMDS','wunifrac')
## Run 0 stress 0.1727684
## Run 1 stress 0.184859
## Run 2 stress 0.1759862
## Run 3 stress 0.170771
## ... New best solution
## ... Procrustes: rmse 0.0312149 max resid 0.1689884
## Run 4 stress 0.1843082
## Run 5 stress 0.1777124
## Run 6 stress 0.1861215
## Run 7 stress 0.1856292
## Run 8 stress 0.1880841
## Run 9 stress 0.1697572
## ... New best solution
## ... Procrustes: rmse 0.02456608 max resid 0.1683361
## Run 10 stress 0.1822552
## Run 11 stress 0.1862356
## Run 12 stress 0.1756407
## Run 13 stress 0.1840739
## Run 14 stress 0.1721779
## Run 15 stress 0.1722946
## Run 16 stress 0.1858437
## Run 17 stress 0.190155
## Run 18 stress 0.1776999
## Run 19 stress 0.1881333
## Run 20 stress 0.172598
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
plot_ord(ps.sub, 'Spayed.or.Neutered','MDS','bray')
plot_ord(ps.sub, 'Spayed.or.Neutered','MDS','wunifrac')
plot_ord(ps.sub, 'Spayed.or.Neutered','NMDS','bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2280626
## Run 1 stress 0.2483079
## Run 2 stress 0.25055
## Run 3 stress 0.2397114
## Run 4 stress 0.2532627
## Run 5 stress 0.2462098
## Run 6 stress 0.2299009
## Run 7 stress 0.2250451
## ... New best solution
## ... Procrustes: rmse 0.03116062 max resid 0.2722357
## Run 8 stress 0.2433595
## Run 9 stress 0.22476
## ... New best solution
## ... Procrustes: rmse 0.01798441 max resid 0.09439236
## Run 10 stress 0.2537269
## Run 11 stress 0.2537722
## Run 12 stress 0.2249332
## ... Procrustes: rmse 0.004292774 max resid 0.03314972
## Run 13 stress 0.2384274
## Run 14 stress 0.2469523
## Run 15 stress 0.2288698
## Run 16 stress 0.2249066
## ... Procrustes: rmse 0.004956608 max resid 0.04562579
## Run 17 stress 0.2355581
## Run 18 stress 0.2516222
## Run 19 stress 0.2249068
## ... Procrustes: rmse 0.004971236 max resid 0.04574137
## Run 20 stress 0.2438439
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 2: no. of iterations >= maxit
## 18: stress ratio > sratmax
plot_ord(ps.sub, 'Spayed.or.Neutered','NMDS','wunifrac')
## Run 0 stress 0.1727684
## Run 1 stress 0.1798392
## Run 2 stress 0.1727729
## ... Procrustes: rmse 0.002022282 max resid 0.01326022
## Run 3 stress 0.1697564
## ... New best solution
## ... Procrustes: rmse 0.0192328 max resid 0.1507751
## Run 4 stress 0.1791563
## Run 5 stress 0.1957019
## Run 6 stress 0.1776067
## Run 7 stress 0.1861999
## Run 8 stress 0.1841555
## Run 9 stress 0.1915089
## Run 10 stress 0.1705142
## Run 11 stress 0.1844876
## Run 12 stress 0.1922407
## Run 13 stress 0.1737096
## Run 14 stress 0.1960469
## Run 15 stress 0.189511
## Run 16 stress 0.1874134
## Run 17 stress 0.1853467
## Run 18 stress 0.1694329
## ... New best solution
## ... Procrustes: rmse 0.01733225 max resid 0.1679935
## Run 19 stress 0.1848981
## Run 20 stress 0.1885693
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
Here we are only focusing on epileptic dogs.
ps.sub.epi <- ps.sub %>%
subset_samples(Epileptic.or.Control == 'Epileptic')
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
plot_ord(ps.sub.epi, 'Pheno.Y.N','MDS','bray')
plot_ord(ps.sub.epi, 'Pheno.Y.N','MDS','wunifrac')
plot_ord(ps.sub.epi, 'Pheno.Y.N','NMDS','bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2268725
## Run 1 stress 0.2483844
## Run 2 stress 0.2482238
## Run 3 stress 0.2425354
## Run 4 stress 0.2410384
## Run 5 stress 0.2497967
## Run 6 stress 0.2363233
## Run 7 stress 0.2487216
## Run 8 stress 0.2420779
## Run 9 stress 0.2505208
## Run 10 stress 0.2443005
## Run 11 stress 0.2574419
## Run 12 stress 0.2470963
## Run 13 stress 0.2564868
## Run 14 stress 0.228107
## Run 15 stress 0.228107
## Run 16 stress 0.2268724
## ... New best solution
## ... Procrustes: rmse 3.034855e-05 max resid 0.0001101721
## ... Similar to previous best
## Run 17 stress 0.2463383
## Run 18 stress 0.2427176
## Run 19 stress 0.2434112
## Run 20 stress 0.2284241
## *** Best solution repeated 1 times
plot_ord(ps.sub.epi, 'Pheno.Y.N','NMDS','wunifrac')
## Run 0 stress 0.1883217
## Run 1 stress 0.1856734
## ... New best solution
## ... Procrustes: rmse 0.05735036 max resid 0.2495846
## Run 2 stress 0.184041
## ... New best solution
## ... Procrustes: rmse 0.0596288 max resid 0.2049456
## Run 3 stress 0.194911
## Run 4 stress 0.1879386
## Run 5 stress 0.1843693
## ... Procrustes: rmse 0.01670532 max resid 0.1044052
## Run 6 stress 0.1907494
## Run 7 stress 0.1897037
## Run 8 stress 0.1930448
## Run 9 stress 0.1875274
## Run 10 stress 0.1979013
## Run 11 stress 0.2008512
## Run 12 stress 0.1897458
## Run 13 stress 0.1923424
## Run 14 stress 0.1907491
## Run 15 stress 0.195468
## Run 16 stress 0.1963123
## Run 17 stress 0.185802
## Run 18 stress 0.1960908
## Run 19 stress 0.1820523
## ... New best solution
## ... Procrustes: rmse 0.07432723 max resid 0.2571129
## Run 20 stress 0.182001
## ... New best solution
## ... Procrustes: rmse 0.002509384 max resid 0.01489542
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 1: no. of iterations >= maxit
## 19: stress ratio > sratmax
Drug usage does not seem like a significant factor.
ps.age <- ps.sub %>%
subset_samples(Age..months. != 'NA')
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
sample_data(ps.age)$Age..months. <- sample_data(ps.age)$Age..months. %>%
as.numeric()
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'RNeXML'
ps.age # two samples does not contain age information
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 328 taxa and 96 samples ]
## sample_data() Sample Data: [ 96 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 328 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 328 tips and 327 internal nodes ]
## refseq() DNAStringSet: [ 328 reference sequences ]
dist.matrix <- phyloseq::distance(ps.age, method='bray')
sam.age <- sample_data(ps.age) %>% data.frame()
adonis2(dist.matrix~Age..months., data = sam.age, permutations=1e4, strata = sam.age$Household)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 10000
##
## adonis2(formula = dist.matrix ~ Age..months., data = sam.age, permutations = 10000, strata = sam.age$Household)
## Df SumOfSqs R2 F Pr(>F)
## Age..months. 1 0.2537 0.01513 1.4442 0.2869
## Residual 94 16.5094 0.98487
## Total 95 16.7630 1.00000
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Asia/Shanghai
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ade4_1.7-22 vegan_2.6-6.1 lattice_0.22-6 permute_0.9-7
## [5] phyloseq_1.48.0 here_1.0.1 lubridate_1.9.3 forcats_1.0.0
## [9] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
## [13] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.16.0
## [3] jsonlite_1.8.8 magrittr_2.0.3
## [5] farver_2.1.2 rmarkdown_2.27
## [7] zlibbioc_1.50.0 vctrs_0.6.5
## [9] multtest_2.60.0 Rsamtools_2.20.0
## [11] htmltools_0.5.8.1 S4Arrays_1.4.0
## [13] progress_1.2.3 Rhdf5lib_1.26.0
## [15] SparseArray_1.4.1 rhdf5_2.48.0
## [17] adegenet_2.1.10 sass_0.4.9
## [19] bslib_0.7.0 plyr_1.8.9
## [21] cachem_1.1.0 uuid_1.2-0
## [23] GenomicAlignments_1.40.0 igraph_2.0.3
## [25] mime_0.12 lifecycle_1.0.4
## [27] iterators_1.0.14 pkgconfig_2.0.3
## [29] Matrix_1.7-0 R6_2.5.1
## [31] fastmap_1.2.0 GenomeInfoDbData_1.2.12
## [33] MatrixGenerics_1.16.0 shiny_1.8.1.1
## [35] digest_0.6.35 colorspace_2.1-0
## [37] ShortRead_1.62.0 S4Vectors_0.42.0
## [39] phylobase_0.8.12 rprojroot_2.0.4
## [41] GenomicRanges_1.56.0 hwriter_1.3.2.1
## [43] labeling_0.4.3 fansi_1.0.6
## [45] timechange_0.3.0 httr_1.4.7
## [47] abind_1.4-5 mgcv_1.9-1
## [49] compiler_4.4.0 withr_3.0.0
## [51] BiocParallel_1.38.0 highr_0.11
## [53] MASS_7.3-60.2 DelayedArray_0.30.1
## [55] biomformat_1.32.0 tools_4.4.0
## [57] rncl_0.8.7 ape_5.8
## [59] httpuv_1.6.15 glue_1.7.0
## [61] dada2_1.32.0 nlme_3.1-164
## [63] rhdf5filters_1.16.0 promises_1.3.0
## [65] grid_4.4.0 cluster_2.1.6
## [67] reshape2_1.4.4 generics_0.1.3
## [69] seqinr_4.2-36 gtable_0.3.5
## [71] tzdb_0.4.0 data.table_1.15.4
## [73] hms_1.1.3 xml2_1.3.6
## [75] utf8_1.2.4 XVector_0.44.0
## [77] BiocGenerics_0.50.0 foreach_1.5.2
## [79] pillar_1.9.0 later_1.3.2
## [81] splines_4.4.0 deldir_2.0-4
## [83] survival_3.6-4 tidyselect_1.2.1
## [85] Biostrings_2.72.0 knitr_1.47
## [87] IRanges_2.38.0 SummarizedExperiment_1.34.0
## [89] stats4_4.4.0 xfun_0.44
## [91] Biobase_2.64.0 matrixStats_1.3.0
## [93] stringi_1.8.4 UCSC.utils_1.0.0
## [95] yaml_2.3.8 evaluate_0.23
## [97] codetools_0.2-20 interp_1.1-6
## [99] cli_3.6.2 RcppParallel_5.1.7
## [101] xtable_1.8-4 munsell_0.5.1
## [103] jquerylib_0.1.4 Rcpp_1.0.12
## [105] GenomeInfoDb_1.40.0 png_0.1-8
## [107] XML_3.99-0.16.1 parallel_4.4.0
## [109] RNeXML_2.4.11 prettyunits_1.2.0
## [111] latticeExtra_0.6-30 jpeg_0.1-10
## [113] bitops_1.0-7 pwalign_1.0.0
## [115] scales_1.3.0 crayon_1.5.2
## [117] rlang_1.1.3